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Comparison of generated codes by change of i 



Default 


, Designate "-mmax-gp-datasize=40" I 


inta[8]; 
intc[10]; 


j int a[8]; | 
j intc[10]; I 


int sample(void) 
{ 


i int sample(void) I 
! a[7] = c[0] + c[9J; I 


a[7] = c[0] + c[9]; 


return a[7]; 

} 


| return a[7]; I 

1 ) 1 
i 1 


ld n>(gp,_c$-.MN.gptop);; ' 
setlo rO,LO(_c+36);; ! 
sethi rO,HI(_c+36);; ! 
Id r1,(r1);; 
Id r0,(r0);; 

add r0.r1.r0;; J 

st (9P,_a - .MN.gptop+28),rO J 
ret;; i 


,d r Ugp,_c- .MN.gptop);; I 
ld r0,(gp,_c - .MN.gptop+36);; I 

add r0,r1,r0;; I 

st (9P,_a - MN.gptop+28),rO I 
ret;; 1 


1 0 cycles i 
8 bytes ,' 


7 cycles | 
5 bytes 1 
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Wien maximum data size to be allocated to gp region is 40 



No size designation Defined 
outside of file 



extern int a[ ]; 
extern int c[ ]; 



{ 



int sample(void) 
a[7J = c[0] + c[9]; 
return a[7J; 



} 



setlo r0,LO(_c);; 
setlo r1,LO(_c+36) 
sethi rO,HI(_c);; 
sethi r1,HI(_c+36);; 
Id r3,(r0);; 
setlo r2,LO(_a+28) 
Id r0,(r1);; 
sethi r2,HI(_a+28);; 
add r1,r3,r0;; 
mov r0,r1 
st (r2),r1 
ret :; 

10 cycles 
12 bytes 



Defined inside of file/Size 
designation Defined outside of file I 

int a[8]; 

extern int c[10J; 



{ 



} 



int sample(void) 
a[7] = c[0] + C f9]; 
return a[7J; 



ld r1,(gp,_c-.MN.gptop);; 

ld rO,(gp,_ C -.MN.gptop+36);; 
add rO.M.rO;; 

(9P,_a - .MN.gptop+28),rO 



st 



ret;; 
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bytes 



Fig. 42 

When maximum data size to be allocated to gp region is default of 32 



Size designation Defined out^iH 
[of file/ Defined inside of file 
"#pragma _no_gp_access" 
directive 


eiNo size designation Defined outsidel 
i of file/ Defined inside of file 1 
i "Pragma _gp_access directive 1 


#pragma _no_gp_access a, c 


i #pragma _gp_access a, c 1 


extern int a[8]; 
intc[10]; 


j extern int a[]; I 
n'ntc[10]; 1 


int sample(void) 
{ 


J int sample(void) 1 

;{ I 

j = c[0] + c[9]; 1 


a[7] = c[0] + c[9]; 


return a[7]; 

> 


j return a[7]; 1 
>} 1 


setlo rO,LO(_c);; 
setlo r1,LO(_c+36) | 
seini ru,nl(_c);; 1 
sethi r1,HI(_c+36);; ! 
Id 13,(10);; | 
setlo r2,LO( a+28) | 
Id r0,(r1);; 

sethi r2,HI(_a+28);; ! 
add r1,r3,r0;; | 
mov r0,r1 1 
s * (r2),r1 ! 

re * ! 


ld r1,(gp, c - .MN.qptoD)" 1 
'd rO,(gp,_c - .MN.gptop+36);; 1 
addrO,M,rO;; I 

st (9P>_a - .MN.gptop+28),rO 1 
ret;; 1 


1 0 cycles 1 ■ 
12 bytes • , 
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Compilation with compile option -0 



Default 



inta[100]; 
intb[100J; 

voidsample(intc) 



{ 



int i; 

for(i=0; i<100;i++){ 
afi] = bfi] + C ; 

} 



,d ^.(gp^xS-.MN.gptop);; 
movr1,98 

settarC6,C4,L00030 

,d r4,(gp,_v$..MN.gptoD)- 
L00030 W " 

[04] add r2,r3,r0 

[C6] Id r3,(r5+);; 

[C4J st (r4+),r2 

[C6] jloop C6 f C4,tar,r1 r 1;; 



ret 



"#pragma _soflware_pipelining_with _proepi n 

directive 

int a[100J; 
int b[100J; 
void sample(int c) 



f 



int i; 



pragma _software_pipelining with proepi 
for(i=0; i<100; i++) { 
a[i] = b[i] + c; 

} 

} 

,d r 5.(9P,_x$ - .MN.gptop)- 
Id r4,(r5+);; 
mov r1 ,96 
setter C6.L00023 

,d r3 .(9P._v$ -.MN.gptop) ■ 
L00023 P; " 

add r2,r4,r0 
Id r4,(r5+) ;> - 
st (r3+),r2 
[C6J jloop C6,C4,tar,r1,-1;; 
add r2,r4,r0;; 
st (r3+),r2 
ret 



3+2X99 + 3 = 204 cycles 
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int a[100]; 
int b[100]; 
void sample(int c) 



{ 



int i, ret=0; 

for(i=0; i<100; { 
ret += a[i] * a[i]; 

} 

return ret; 



"#pragma _loop_unroll" directive 

int a[100J; 
int b[100]; 
void sample(int c) 
{ 

int i, ret=0; 
#pragma _loop_unroll 
for(i=0; i<100; i++) { 
ret += a[i] * a [i]; 

} 

return ret; 

} 



mov rO,0 
mov r1,98;; 
settar C6,C4,L00013 

ld r3.(9P,_a$ - .MN.gptop) 
mul m0,r2,gp,0;; 
L00013 

[C4] mac m0,r0,r2 l r2 ( m0 
[C6J Id r2,(r3+) 



[C6]jloop Ce^.tar.rl.-I;; 
ret ;; 

2 + 2X101 + 3 = 207 cycles 
9 bytes 



,d r3,(gp ( _ a $ - .MN.gptop);; 
mov rO,0 
mov r1,48;; 

settar C6,C2:C4,L00013 
add r5,r3,4 

mul m0,r2,gp,0;; 
L00013 

[C2J mac m0,r0,r2,r2,m0 
[C3] add r3,r3,8 
[C3] Id r2,(r5+)8;; 
[C3] mac m0,r0 I r4,r4,m0 
[C4J Id r4,(r3) 

[C6]jloop C6,C2:C4,tar,r1,-l - 
ret;; 

3 + 2X 52 + 3 = 110 cycles 
13 bytes 



Fig. 48 



Default 



int a[100J; 
int b[100J; 
void sample(int c) 
{ 

int i; 

int *pa=a; 
int *pb=b; 



} 



} 



for(i=0; i<100; i++) { 

* pa++ = i * c » * pb+ 



mov r4,0 

ld r6 -(9P,_a$ - .MN.gptop) 
movr1,98 

settar C6,C4,L00025 
ld r5,(gp,_b$ - .MN.gptop)- 
L00025 P) ' 
' [C4] asr r2,r4,r3;; 



[C6] Id r3,(r5+);; 
[C4] add r4,r4,r0 
[C4]st (r6+),r2 
[C6]jloop C6,C4 l tar ) r1,-1; 
ret ;; 

2 + 3X101 +3 = 308 cycles 
11 bytes 



"#pragma _loop_unroll n directive 

and "#pragma_align_local pointer" 
directive ~ 1 



inta[100]; 
int bf100]; 
void sample(int c) 
{ 

int i; 

#pragma_align_local _pointer=8 pa.pb 

int *pa=a; 

int *pb=b; 
#pragma _loop_unroll 

for(i=0;i<100;i++){ 
*pa++ = j * c » *pb++; 



} 



mov r6,0 

ld r 8,(gp,_a$ - .MN.gptop);; 
movr1,48 

settar C6,C4,L00016 
ld r 7,(gp,_b$ - .MN.gptop) - 
L00016 P) " 

[C2] add r6,r6,r0;; 
[C2] stp (r8+),r2:r3 
[C3] asr r2,r6,r4 
[C3] add r6,r6,r0;; 
[C3] asr r3,r6,r5 
[C4J Idp r4:r5,(r7+) 

[C6]jloop C6,C2:C4 I tar,r1,-1- 
ret;; 

3X 51 + 3 = 158 cycles 
bytes 



Fig. 49 



Without "#pragma _min iteration' 
directive 
int a[101]; 
int b[101J; 

void sample(int c, int end) 



{ 



} 



int i; 

int *pa=a; 
int *pb=b; 

for (i = 0; i <end; { 
*pa++ = i * c » *pb++; 



*pa = end; 



} 



With "#pragma _min_iteration" 
directive 
int a[101]; 
int b[101]; 
, void sample(int c, int end) 

int i; 

int *pa=a; 
int*pb=b; 
#pragma _min_iteration=4 
forji = 0; i <end; i++) { 
*pa++ = i * c » *pb++; 

*pa = end; 



cmple CO.M.O 

,d r 5,(gp,_a$-.MN.gptop)- 
mov r4,0 

Id r6,(gp,_b$-.MN.gptop) 
[COJbr L00016;; P) 
mov r3,0 
settar L00017 
L00017 

Id r2,(r6+);; 
add r4,1;; 
cmplt C0,r4,r1 
asrr2,r3,r2;; 
add r3,r3,r0 
st (r5+),r2 
[COJjmpf tar;; 
L00016 

st (r5),r1 
ret ;; 

2 + 4X100 + 3 = 405 cycles 
16 bytes 



mov r5,0 

ld r7 .(gp,_a$-.MN.gptop)-| 
settar C6,C4,L00027 
sub r2,r1,2 
,d r 6,(gp,_b$ - .MN.gptop);;! 



L00027 

[C4] asrr3,r5,r4;; 
[C6]ld r4,(r6+);; 
[C4] add r5,r5,r0 
[C4] st (r7+),r3 
[C6Jjloop C6,C4 I tar,r2,-1;; 



st (r7),r1 
ret ;; 



'2 + 
12 



3X 101 +3 = 308 cycles 
bytes 



Fig. 50 



Example of loop unrolling being impossible 



C language source with unknown 
number of loops 

void sample(ini c, int end) 



int i; 



#pragma_alignJocal_pointer=8 pa, pb 
int *pa=a; 

int *pb=b; 
#pragma _loop_unroll 
#pragma _min_iteration=4 

for (i = 0; i <end; { 
*pa++ = i *c» *pb++; 

*pa = end; 



} 



Result of compilation 
mov r5,0 

,d r 7,(gp,_a$ - .MN.gptop); 
settar C6,C4,L00026 
sub r2,r1,2 

ld r6,(g Pl _b$- .MN.gptopV ■ 
L00026 

[C4] asr r3,r5,r4;; 
[C6] Id r4,(r6+);; 
[C4] add r5,r5,r0 
[C4] st (r7+),r3 
[C6]jloop C6,C4,tar,r2 ( -1- 

st (r7),r1 

ret ;; 



"#pragma _iteration_even' 
I directive 

inta[101]; 
int b[101J; 

void sample(int c, int end) 



Fig. 51 



{ 



i "#pragma _iteration_odd' 
| directive 

int a[101]; 
int b[101J; 

void sample(int c, int end) 



int i; 



if 



} 



#pragma_align_local_pointer=8 pa 

int *pa=a; 

int *pb=b; 
#pragma _min_iteration=50 
#pragma _loop_unroll 
#pragma _iteration_even 

for (i = 0; i <end; i++) { 
*pa++ = i *c» *pb++; 

*pa = end; 



Pb 



int i; 



sub r2,r1,6;; 
mov r6,0 
asrr2,1 

ld r7,(gp,_a$ - .MN.gptop)- 
settar C6,C4,L00036 
addr2,1 

ld r8,(gp,_b$ - .MN.gptop);; 
L00036 ' 

[C4]asrr3,r6,r4;; 
[C4]addr6,r6,r0 
[C4]st (r7+),r3;; 
[UJasrrS.rG.rS;; 
[C6]ldpr4:r5,(r8+);; 
[C4] add r6,r6,r0 
[C4]st (r7+)/3 
(C6]jloop C6,C4,tar,r2,-1;; 



st (r7),r1 
ret ;; 



i#pragma_align_local_pointer=8 pa pb 
int *pa=a; 

int *pb=b; 
I #pragma _min_iteration=50 
i #pragma _loop_unroll 
\ #pragma _iteration_odd 
for (i = 0; i <end; i++) { 
*pa++ = i * c » *pb++; 

*pa = end; 



!) 



sub r2,r1,7;; 
mov r6,0 
asrr2,1 

ld r7,(gp,_a$ - .MN.gptop);; 
settar C6.C4.L00046 
add r2,1 

, ,d r8,(gp,_b$ - .MN.gptop);; 
]L00046 W " 

l [C4Jasrr3,r6,r4;; 
, [C4] add r6.r6.r0 
I [C4] st (r7+),r3;; 
i[C4]asrr3,r6,r5;; 
( , [C6Jldpr4:r5 I (r8+);; 
i [C4] add r6,r6,r0 
J fC4J st (r7+),r3 
i[C6Jjloop C6.C4.tar.r2.-1;; 
Id r2,(r8+);; 
asrr2,r6,r2;; 
st (r7+),r2;; 
st (r7).r1 
ret ;; 
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No designation of alignment 


. Designation of option ~] 
; "-falign_short_array=4" 1 


short src[8]; 
short dst[8]; 


; No change 1 


voidalign1(void) 
{ 

char dummy; 
inf i; 






for(i=0;i<8;i+=2) 

dst[i]=src[i] + src[i+1]; 




addu r5,gp,_src - .MN.gptop 

addur4,gp,_src-.MN.gptop+2 ;; 

addu r3,gp,_dst-. MN.gptop j 

mov r0,2 I 

settarC6,C4,L00005 ;; i 
L00005 ; 

[C4Jldh r1,(r4+}4 ;; //As alignment is unknown, jr 
[C4J add r1,r2,r1 //two data are loaded ;' f 
[C6] Idh r2,(r5+)4;; //independently (Idh instruction) I 
[C4] sth (r3+)4,r1 ; 

(C6] jloop C6 ( C4 l tar,rO,-1 ;;//L00005 ' 
ret •• ; 


addu r5,gp,_src-. MN.gptop ;; " 1 
addur4 1 gp,_dst-.MN.gptop 1 

settarC6,C4,L00005 ;; 1 
L00005 1 

aoo rl,r2,r3 //As alignment is 4, 1 
C6] Idhp r2:r3,(r5 + ) ;;// P air access instruction 
C4] sth (r4+)4,r1 //(ldhp)is generated. 
?6] jloop CB.Ol.tar.rO.-l ;;//L00005 1 

ret ;; J 


2+4X5+3=25 cycles ; 2 
22 bytes j t 


+2X5+3=15 cycles I 
8 bytes I 



Fig. 56 



No designation of alignment 
intalign2(short*src) 



int v = 0 ; 

for(inti=0;i<100;i+=2){ 
^ v+=src[i]*src[i+1]; 

return v ; 



mov r5,0 
mov r1,48 

settarC6,C4,L00005 ;; 
mov r4,r0 

add r3,r0,2 
mul m0,(2,gp,0 ;; 
L00005 

W Wh ;; f ^ 

(C4J lmacmOAf2,ri),mO//As 
[C6J Idh (2,(r4+)4 

fC6J /toop C6,C4,tar,r1,-1 //L00005 
mov rt),r5 

ret ;; 

2+3X51+3=160 cycles 
24 bytes 



lining, 
is unknown, 



#pragma _align_parm _pointer=4 src 
intalign2(short*src) 



int v = 0; 

for(inti=0;i<100;i+=2){ 
^ v += srcfi] * srcfi+1J ; 

return v; 

mov r4,0 ;; 
mov rl,48 



L00005 



mul mO.^.gp.O 



//Applysoftwarepipelining. 

""^am //Pairaccess instruction is usable I 
j'oop m,W ;;//L00005 
mov rt)/4 
ret ;: 



[C6J 
HI 



2+2X51+3=107 cycles 



8 



bytes 



No designation of alignment 



Fig. 57 



void align3(int n) 



{ 



short * from ; 
short * to ; 
int i ; 

from = &(srcfnj) ; to = &(dstfnJJ • 

for('=0;i<16;i ++) from+=2, to+'=2){ 
* to = * from ; 

*( f °+1)=*(from+l) ; 



,d r3,(gp,_src$-.MN 



•gptop) 



L00016 



,d r 2>(gp,_dst$ - .MN.gptop) 
add r1,r0,r0 ;; '* ' 
mov rO, 13 
add r6,r3,r1 
add r5.r2.r1 ;; 
settarC6,L00016 
add r4,r5,2 
add r3,r6,2 



Wh r1,(r6+)4 

'dh r2,(r3+)4 

sth (fS+)4,r1 

sth (r4+)4,r2 

(C6J jloop C6,tar,r0,- 

ret •■ 



//LOOOljs 



"^pragma 
j directiv e 

void align3(int n) 



_align_local_ pointer" 



{ 



Pragma .align Jocal_pointer=4 from 

short * from ; 

short * to ; 
int i • 



.tol 



from = &(srcfn]) ; to = &(dstfnj) ■ 
for(i=0;i<16;i+ +) from+=2, to+= 



*to=* from 
c (to+1)=*(f rom+ t) 



=2){ 



L00016 



,d r3,(gp,_src$ - .MN 
,d r2,(gp,_dst$ - .MN 
mov r1, 13 
add rO.rO.rO ;; 
settarC6,L00016 
add r4.r3.r0 
add r0.r2.r0 ;; 



•gptop) 
•gptop) 



Idhp r2:r3,(r4+) ■ 
sthp (r0+),r2:r3 
(C6J jloop C6,tar,r1,-1 
ret •: 



//L00016 



